########################################################
### Script to Alamos Concha, Priscilla 2018:
### Large-Scale contentious politics 
########################################################


## The R code in this file tests alternative anchors for calibration

## For replication purposes of PHYSREP and CIVIREP, please note that the analysis uses the raw data of:
# (David L. Cingranelli and David L. Richards. 1999. "Measuring the Level, Pattern, and Sequence of 
#  Government Respect for Physical Integrity Rights." International
# Studies Quarterly, Vol 43.2: 407-18).
# Cingranelli, David L., David L. Richards, and K. Chad Clay. 2014. "The CIRI Human Rights Dataset."
# Available online at http://www.humanrightsdata.com. Version 2014.04.14).

## The R code was written by using the packages QCA 3.1, 3.3 and SetMethods 2.3, 2.4


######################################################
### Double check and test alternative calibrations ###
######################################################


rm(list = ls())

# Set working directory:

setwd("D:/UCLouvain/PhD Thesis/PhD/2018/Dataset/Robustness")
Don <-read.csv("Raw data cal_rob.csv", row.names=1)


# load packages
library(QCA); library(QCAGUI); library(SetMethods); library(lattice);
library(arm); library(plyr); library(car); library(stringr); library(xtable)
library(betareg); library (gmodels); library (Hmisc); library (MASS);
library (memisc); library (polycor); library (psych); library (reshape); 
library (VIM); library (XML); library (foreign); library (directlabels);
library (SetMethods); dependencies = TRUE

# we test only fuzzy sets, that is
# 1. PHYSREP
# 2. CIVREP

# our anchors so far are (fully out/cross over/fully in):
# 1, 4.6, 6,   # PHYSREP low
# 2, 5.4, 9,   # CIVREP low

#	1. let us first look at the calibration of 
# PHYSREP, so far anchors at 1, 4.6, 6 (th_1)

#	the raw scores look like this:
summary(Don$PHYSREP)
# note: max is 6 here for this selection of ldp (low PHYSREP), 
# yet, the overall max of this indicator is 8, meaning no physrep

th_1 <- c(1, 4.6, 6)


# visualization of calibration see script on data preparation

conds <- c("PHYSREP","CIVREP")
conds_anchors <- matrix(c(1, 4.6, 6,
                          2, 5.4, 9),
                        ncol=3, byrow = TRUE)

for (i in 1:length(conds)) {
  Don[ ,11+i] <- calibrate(Don[ ,conds[i]], type = "fuzzy", logistic = TRUE, idm = 0.95,
                           thresholds = c("e"= conds_anchors[i,1],"c"= conds_anchors[i,2],"i"= conds_anchors[i,3]))
}

# name the new columns just added to the data, using their original name with "low" 
names(Don)[c(12:13)] <- paste0("LOW", conds)

#	now, let's try alternative calibrations 
th_2 <- c(3, 4, 5.5)
th_3 <- c(2.5, 5, 7)

#	make new fuzzy variables
Don$lowphysrep1 <- calibrate(Don$PHYSREP, 'fuzzy', thresholds=th_2, logistic=T)
Don$lowphysrep2 <- calibrate(Don$PHYSREP, 'fuzzy', thresholds=th_3, logistic=T)

#	compare the three calibrations:

par(mfrow=c(1, 3))
plot(Don$PHYSREP, Don$LOWPHYSREP, pch=19, col=rgb(0,0,1,0.5),
     main= "Old Calibration PHYSREP",
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))
plot(Don$PHYSREP, Don$lowphysrep1, pch=19, col=rgb(0,0,1,0.5),
     main='1st alternative calibration PHYSREP',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))	
plot(Don$PHYSREP, Don$lowphysrep2, pch=19, col=rgb(0,0,1,0.5),
     main='2nd alternative calibration PHYSREP',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))

# See cases per plot, 

rownames(subset(Don, LOWPHYSREP > 0.5))
rownames(subset(Don, lowphysrep1 > 0.5))
rownames(subset(Don, lowphysrep2 > 0.5))

#	now plot all alternatives with outcome 

par(mfrow=c(2, 2))
xy.plot(Don$LOWPHYSREP, Don$LSCONTPOL, necessity=F, 
        main='Old calibration',
        xlab='LOWPHYSREP_old', ylab='LSCONTPOL')
xy.plot(Don$lowphysrep1, Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration',
        xlab='LOWPHYSREP_1', ylab='LSCONTPOL')
xy.plot(Don$lowphysrep2, Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration',
        xlab='LOWPHYSREP_2', ylab='LSCONTPOL')
plot.new()	

# For the outcome negated
par(mfrow=c(2, 2))
xy.plot(Don$LOWPHYSREP, 1- Don$LSCONTPOL, necessity=F, 
        main='Old calibration',
        xlab='LOWPHYSREP_old', ylab='�LSCONTPOL')
xy.plot(Don$lowphysrep1, 1- Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration',
        xlab='LOWPHYSREP_1', ylab='�LSCONTPOL')
xy.plot(Don$lowphysrep2, 1- Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration',
        xlab='LOWPHYSREP_2', ylab='�LSCONTPOL')



#	now let's consider parameters of fit for sufficiency of low repression
#	for outcome = LSCONTPOL:

nf1 <- QCAfit(1- Don$LSCONTPOL, Don$LOWPHYSREP,  necessity=F)
nf2 <- QCAfit(1- Don$LSCONTPOL, Don$lowphysrep1, necessity=F)
nf3 <- QCAfit(1- Don$LSCONTPOL, Don$lowphysrep2, necessity=F)

rbind(nf1, nf2, nf3)

# use first alternative anchor for alternative QCA
# / outcome "no LSCONTPOL":

# to prevent confusion, erase uncalibrated columns:
Don[ ,(6:7)] <- NULL

# # Creation of LIMDESPOT as condition combined by OR [old civrep cal]

Don$LIMDESPOT <- pmax(Don$LOWPHYSREP, Don$LOWCIVREP)
Don$LIMDESPOT_1 <- pmax(Don$lowphysrep1, Don$LOWCIVREP)
Don$LIMDESPOT_2 <- pmax(Don$lowphysrep2, Don$LOWCIVREP)

# NAMES
rownames(subset(Don, LIMDESPOT > 0.5))

# [1] "Bahrain" "Kuwait"  "Oman"    "Qatar"   "UAE" 

rownames(subset(Don, LIMDESPOT_1 > 0.5))
# [1] "Algeria" "Bahrain" "Kuwait"  "Morocco" "Oman"    "Qatar"   "UAE" 

rownames(subset(Don, LIMDESPOT_2 > 0.5))
# [1] "Bahrain" "Kuwait"  "Oman"    "Qatar"   "UAE""

# LIMDESPOT_2 includes two more cases than old calibration: Algeria & Morocco
# Keeping the OR combination with CIVREP with old calibration.

# rename other colums:
Don <- rename(Don, c("LOWPHYSREP" = "PHYSREP", 
                     "LOWCIVREP" = "CIVREP"))


##################################################################
##################################################################

# Lets now look at the calibration of 
# civrep, so far anchors at 2, 5.4, 9 (th_1)

#	The raw scores look like this:
summary(Don$LOWCIVREP)

#	now, let's try alternative calibrations 
thc_2 <- c(3, 6, 8)
thc_3 <- c(2.5, 6.4, 8.5)

#	Make new fuzzy variables
Don$lowcivrep1 <- calibrate(Don$CIVREP, 'fuzzy', thresholds=thc_2, logistic=T)
Don$lowcivrep2 <- calibrate(Don$CIVREP, 'fuzzy', thresholds=thc_3, logistic=T)

#	Compare the three calibrations:

par(mfrow=c(1, 3))
plot(Don$CIVREP, Don$LOWCIVREP, pch=19, col=rgb(0,0,1,0.5),
     main= "Old Calibration CIVREP",
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))
plot(Don$CIVREP, Don$lowcivrep1, pch=19, col=rgb(0,0,1,0.5),
     main='1st alternative calibration CIVREP',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))	
plot(Don$CIVREP, Don$lowcivrep2, pch=19, col=rgb(0,0,1,0.5),
     main='2nd alternative calibration CIVREP',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))



# See cases per plot, 
rownames(subset(Don, CIVREP > 0.5))
rownames(subset(Don, lowcivrep1 > 0.5))
rownames(subset(Don, lowcivrep2 > 0.5))


#	now plot all alternatives with outcome 

par(mfrow=c(2, 2))
xy.plot(Don$LOWCIVREP, Don$LSCONTPOL, necessity=F, 
        main='Old calibration CIVREP',
        xlab='LOWCIVREP_old', ylab='LSCONTPOL')
xy.plot(Don$lowcivrep1, Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration CIVREP',
        xlab='LOWCIVREP_1', ylab='LSCONTPOL')
xy.plot(Don$lowcivrep2, Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration CIVREP',
        xlab='LOWCIVREP_2', ylab='LSCONTPOL')
plot.new()	


#	now let's consider parameters of fit for sufficiency of low repression
#	for outcome = LSCONTPOL:

nf1 <- QCAfit(Don$LSCONTPOL, Don$LOWCIVREP,  necessity=F)
nf2 <- QCAfit(Don$LSCONTPOL, Don$lowcivrep1, necessity=F)
nf3 <- QCAfit(Don$LSCONTPOL, Don$lowcivrep2, necessity=F)

rbind(nf1, nf2, nf3)

#	now plot all alternatives with outcome negated

par(mfrow=c(2, 2))
xy.plot(Don$LOWCIVREP, 1- Don$LSCONTPOL, necessity=F, 
        main='Old calibration CIVREP',
        xlab='LOWCIVREP_old', ylab='�LSCONTPOL')
xy.plot(Don$lowcivrep1, 1- Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration CIVREP',
        xlab='LOWCIVREP_1', ylab='�LSCONTPOL')
xy.plot(Don$lowcivrep2, 1- Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration CIVREP',
        xlab='LOWCIVREP_2', ylab='�LSCONTPOL')
plot.new()	


#	now let's consider parameters of fit for sufficiency of low repression
#	for outcome negated = � LSCONTPOL:

nf1 <- QCAfit(1- Don$LSCONTPOL, Don$LOWCIVREP,  necessity=F)
nf2 <- QCAfit(1- Don$LSCONTPOL, Don$lowcivrep1, necessity=F)
nf3 <- QCAfit(1- Don$LSCONTPOL, Don$lowcivrep2, necessity=F)

rbind(nf1, nf2, nf3)


# use first alternative anchor for alternative QCA
# / outcome "LSCONTPOL":

# Combine conditions by logical OR

# LIMDESPOT= combination of physrep and civrep
Don$LIMDESPOT <- pmax(Don$LOWPHYSREP, Don$LOWCIVREP) 
Don$LIMDESPOT_3 <- pmax(Don$lowphysrep1, Don$lowcivrep1) 
Don$LIMDESPOT_4 <- pmax(Don$lowphysrep2, Don$lowcivrep2) 

#	Creation of condition LIMDESPOT with logical OR,
# using the first alternative calibration of PHYSREP and CIVREP
# Compare the three calibrations:

par(mfrow=c(1, 2))
plot(Don$LIMDESPOT, Don$LIMDESPOT_3, pch=19, col=rgb(0,0,1,0.5),
     main='1st alternative calibration LIMDESPOT',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))	
plot(Don$LIMDESPOT, Don$LIMDESPOT_4, pch=19, col=rgb(0,0,1,0.5),
     main='2nd alternative calibration LIMDESPOT',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))


#	now plot all alternatives with outcome 

par(mfrow=c(2, 2))
xy.plot(Don$LIMDESPOT, Don$LSCONTPOL, necessity=F, 
        main='Old calibration LIMDESPOT',
        xlab='LIMDESPOT_old', ylab='LSCONTPOL')
xy.plot(Don$LIMDESPOT_3, Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration LIMDESPOT',
        xlab='LIMDESPOT_3', ylab='LSCONTPOL')
xy.plot(Don$LIMDESPOT_4, Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration LIMDESPOT',
        xlab='LIMDESPOT_4', ylab='LSCONTPOL')
plot.new()	

# See cases per plot, 
rownames(subset(Don, LIMDESPOT > 0.5))
rownames(subset(Don, LIMDESPOT_3 > 0.5))
rownames(subset(Don, LIMDESPOT_4 > 0.5))

# check data
head(Don)


### No analysis of necessity for proximate conditions### 
#############################

### Analysis of sufficiency ###
###############################

# truth table
TT_y <- truthTable(Don, outcome = "LSCONTPOL", neg.out = FALSE,
                   conditions = c("NONHERED","LOWSTATCAP","MOBILIZ", 
                                  "MILBEHAVE","NONRESCUE","LIMDESPOT_3"),
                   incl.cut1 = 0.9, 
                   n.cut = 1,
                   show.cases = TRUE,
                   complete = FALSE,
                   sort.by = c("OUT", "incl", "n"))

TT_y

# consistency threshold 0.9
# n.cut = 1

# logical minimization
# conservative
sol_y_c <- minimize(Don, outcome = "LSCONTPOL", neg.out = FALSE,
                    conditions = c("NONHERED","LOWSTATCAP","MOBILIZ", 
                                   "MILBEHAVE","NONRESCUE","LIMDESPOT_3"),
                    incl.cut1 = 0.9, 
                    n.cut = 1,
                    show.cases = TRUE,
                    details = TRUE)
sol_y_c
# consistency is higher, yet, coverage is much lower
# furthermore, the old anchor reflects the meaning of the set much better


# most parsimonious solution
sol_yp <- minimize(Don, outcome = "LSCONTPOL",
                   conditions = c("NONHERED","LOWSTATCAP","MOBILIZ", 
                                  "MILBEHAVE","NONRESCUE","LIMDESPOT_3"),
                   incl.cut1 = 0.9,
                   n.cut = 1,
                   show.cases = TRUE,
                   details = TRUE,
                   row.dom = TRUE,
                   include = "?")
sol_yp

# simplifying assumptions
sol_yp$SA

# ESA ANALYSIS

esol_yp <- minimize(Don, outcome = "LSCONTPOL",
                    conditions = c("NONHERED","LOWSTATCAP","MOBILIZ", 
                                   "MILBEHAVE","NONRESCUE","LIMDESPOT_3"),
                    incl.cut1 = 0.9,
                    n.cut = 1,
                    show.cases = TRUE,
                    details = TRUE,
                    row.dom = TRUE,
                    include = "?",
                    omit = c(3:16, 19:32))
esol_yp

# simplifying assumptions
esol_yp$SA

# ESA Intermediate

esol_yi <- minimize(Don, outcome = "LSCONTPOL",
                    conditions = c("NONHERED","LOWSTATCAP","MOBILIZ", 
                                   "MILBEHAVE","NONRESCUE","LIMDESPOT_3"),
                    incl.cut1 = 0.9, 
                    n.cut = 1,
                    show.cases = TRUE,
                    details = TRUE, include = "?",
                    dir.exp = c("1","1","1","1","1","1"),
                    omit = c(3:16, 19:32))

esol_yi

# easy counterfactuals
esol_yi$i.sol$C1P1$EC

##############################################################################
##############################################################################

# Conclusion of the alternative calibrations:
# no alternative anchor changes the solution formula in a meaningful way
# (new solution terms have very similar coverage)
# the "old" anchors seem to represent the meaning of the sets better than the alternatives
# hence, the "old" anchors are robust!